From 5b9331a6f5e271ab636560b46e079ed7a0d4f17b Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Tue, 18 Nov 2003 17:49:13 +0000 Subject: [PATCH] bitkeeper revision 1.608.1.1 (3fba5b99WMvlBA7JwJeGU5vakf_qWg) event_channel.c: new file sched.h, mm.h, event.h, hypervisor-if.h, domain.c, entry.S: Event channels between domains. Also do not reschedule a domain if a particular guest event is already pending. --- .rootkeys | 1 + xen/arch/i386/entry.S | 3 +- xen/common/domain.c | 4 + xen/common/event_channel.c | 338 +++++++++++++++++++++ xen/include/hypervisor-ifs/hypervisor-if.h | 43 ++- xen/include/xeno/event.h | 22 +- xen/include/xeno/mm.h | 2 +- xen/include/xeno/sched.h | 17 ++ 8 files changed, 419 insertions(+), 11 deletions(-) create mode 100644 xen/common/event_channel.c diff --git a/.rootkeys b/.rootkeys index b96338a42b..d790137e75 100644 --- a/.rootkeys +++ b/.rootkeys @@ -247,6 +247,7 @@ 3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/common/domain_page.c 3ddb79bdeyutmaXEfpQvvxj7eQ0fCw xen/common/event.c +3fba5b96H0khoxNiKbjdi0inpXV-Pw xen/common/event_channel.c 3ddb79bd9drcFPVxd4w2GPOIjLlXpA xen/common/kernel.c 3e4cd9d8LAAghUY0hNIK72uc2ch_Nw xen/common/keyhandler.c 3ddb79bduhSEZI8xa7IbGQCpap5y2A xen/common/lib.c diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 1c828bc1ab..e06c565de7 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -725,6 +725,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_multicall) .long SYMBOL_NAME(do_kbd_op) .long SYMBOL_NAME(do_update_va_mapping) + .long SYMBOL_NAME(do_event_channel_op) .rept NR_syscalls-((.-hypervisor_call_table)/4) .long SYMBOL_NAME(sys_ni_syscall) - .endr + .endr diff --git a/xen/common/domain.c b/xen/common/domain.c index 9edea30a8b..2888e62417 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -52,6 +52,7 @@ struct task_struct *do_createdomain(unsigned int dom_id, unsigned int cpu) spin_lock_init(&p->blk_ring_lock); spin_lock_init(&p->page_lock); + spin_lock_init(&p->event_channel_lock); p->shared_info = (void *)get_free_page(GFP_KERNEL); memset(p->shared_info, 0, PAGE_SIZE); @@ -288,6 +289,8 @@ void free_all_dom_mem(struct task_struct *p) /* Release resources belonging to task @p. */ void release_task(struct task_struct *p) { + extern void destroy_event_channels(struct task_struct *); + ASSERT(p->state == TASK_DYING); ASSERT(!p->has_cpu); @@ -300,6 +303,7 @@ void release_task(struct task_struct *p) destroy_blkdev_info(p); /* Free all memory associated with this domain. */ + destroy_event_channels(p); free_page((unsigned long)p->mm.perdomain_pt); UNSHARE_PFN(virt_to_page(p->shared_info)); free_page((unsigned long)p->shared_info); diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c new file mode 100644 index 0000000000..052dc793e3 --- /dev/null +++ b/xen/common/event_channel.c @@ -0,0 +1,338 @@ +/****************************************************************************** + * event_channel.c + * + * Event channels between domains. + * + * Copyright (c) 2003, K A Fraser. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + + +static long event_channel_open(u16 target_dom) +{ + struct task_struct *lp = current, *rp; + int i, lmax, rmax, lid, rid; + event_channel_t *lchn, *rchn; + shared_info_t *rsi; + unsigned long cpu_mask; + long rc = 0; + + rp = find_domain_by_id(target_dom); + + /* + * We need locks at both ends to make a connection. We avoid deadlock + * by acquiring the locks in address order. + */ + if ( (unsigned long)lp < (unsigned long)rp ) + { + spin_lock(&lp->event_channel_lock); + spin_lock(&rp->event_channel_lock); + } + else + { + if ( likely(rp != NULL) ) + spin_lock(&rp->event_channel_lock); + spin_lock(&lp->event_channel_lock); + } + + lmax = lp->max_event_channel; + lchn = lp->event_channel; + lid = -1; + + /* + * Find the first unused event channel. Also ensure bo channel already + * exists to the specified target domain. + */ + for ( i = 0; i < lmax; i++ ) + { + if ( (lid == -1) && !(lchn[i].flags & ECF_INUSE) ) + { + lid = i; + } + else if ( unlikely(lchn[i].target_dom == target_dom) ) + { + rc = -EEXIST; + goto out; + } + } + + /* If there is no free slot we need to allocate a bigger channel list. */ + if ( unlikely(lid == -1) ) + { + /* Reached maximum channel count? */ + if ( unlikely(lmax == 1024) ) + { + rc = -ENOSPC; + goto out; + } + + lmax = (lmax == 0) ? 4 : (lmax * 2); + + lchn = kmalloc(lmax * sizeof(event_channel_t), GFP_KERNEL); + if ( unlikely(lchn == NULL) ) + { + rc = -ENOMEM; + goto out; + } + + memset(lchn, 0, lmax * sizeof(event_channel_t)); + + if ( likely(lp->event_channel != NULL) ) + kfree(lp->event_channel); + + lp->event_channel = lchn; + lp->max_event_channel = lmax; + } + + lchn[lid].target_dom = target_dom; + lchn[lid].flags = ECF_INUSE; + + if ( likely(rp != NULL) ) + { + rchn = rp->event_channel; + rmax = rp->max_event_channel; + + for ( rid = 0; rid < rmax; rid++ ) + { + if ( (rchn[rid].target_dom == lp->domain) && + (rchn[rid].flags & ECF_INUSE) ) + { + /* + * The target was awaiting a connection. We make the connection + * and send a connection-made event to the remote end. + */ + rchn[rid].flags = ECF_INUSE | ECF_CONNECTED | lid; + lchn[lid].flags = ECF_INUSE | ECF_CONNECTED | rid; + + rsi = rp->shared_info; + if ( !test_and_set_bit(rid, &rsi->event_channel_pend[0]) && + !test_and_set_bit(rid>>5, &rsi->event_channel_pend_sel) ) + { + cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN); + guest_event_notify(cpu_mask); + } + + break; + } + } + } + + out: + spin_unlock(&lp->event_channel_lock); + if ( rp != NULL ) + { + spin_unlock(&rp->event_channel_lock); + put_task_struct(rp); + } + + return rc; +} + + +static long event_channel_close(u16 lid) +{ + struct task_struct *lp = current, *rp = NULL; + event_channel_t *lchn, *rchn; + u16 rid; + shared_info_t *rsi; + unsigned long cpu_mask; + long rc = 0; + + again: + spin_lock(&lp->event_channel_lock); + + lchn = lp->event_channel; + + if ( unlikely(lid >= lp->max_event_channel) || + unlikely(!(lchn[lid].flags & ECF_INUSE)) ) + { + rc = -EINVAL; + goto out; + } + + if ( lchn[lid].flags & ECF_CONNECTED ) + { + if ( rp == NULL ) + { + rp = find_domain_by_id(lchn[lid].target_dom); + ASSERT(rp != NULL); + + if ( (unsigned long)lp < (unsigned long)rp ) + { + spin_lock(&rp->event_channel_lock); + } + else + { + spin_unlock(&lp->event_channel_lock); + spin_lock(&rp->event_channel_lock); + goto again; + } + } + else if ( rp->domain != lchn[lid].target_dom ) + { + rc = -EINVAL; + goto out; + } + + rchn = rp->event_channel; + rid = lchn[lid].flags & ECF_TARGET_ID; + ASSERT(rid < rp->max_event_channel); + ASSERT(rchn[rid].flags == (ECF_INUSE | ECF_CONNECTED | lid)); + ASSERT(rchn[rid].target_dom == lp->domain); + + rchn[rid].flags = ECF_INUSE; + + rsi = rp->shared_info; + if ( !test_and_set_bit(rid, &rsi->event_channel_disc[0]) && + !test_and_set_bit(rid>>5, &rsi->event_channel_disc_sel) ) + { + cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN); + guest_event_notify(cpu_mask); + } + } + + lchn[lid].target_dom = 0; + lchn[lid].flags = 0; + + out: + spin_unlock(&lp->event_channel_lock); + if ( rp != NULL ) + { + spin_unlock(&rp->event_channel_lock); + put_task_struct(rp); + } + + return rc; +} + + +static long event_channel_send(u16 lid) +{ + struct task_struct *lp = current, *rp; + event_channel_t *lchn, *rchn; + u16 rid; + shared_info_t *rsi; + unsigned long cpu_mask; + + spin_lock(&lp->event_channel_lock); + + lchn = lp->event_channel; + + if ( unlikely(lid >= lp->max_event_channel) || + unlikely((lchn[lid].flags & (ECF_INUSE|ECF_CONNECTED)) != + (ECF_INUSE|ECF_CONNECTED)) ) + { + spin_unlock(&lp->event_channel_lock); + return -EINVAL; + } + + rid = lchn[lid].flags & ECF_TARGET_ID; + rp = find_domain_by_id(lchn[lid].target_dom); + ASSERT(rp != NULL); + + spin_unlock(&lp->event_channel_lock); + + spin_lock(&rp->event_channel_lock); + + rchn = rp->event_channel; + + if ( unlikely(rid >= rp->max_event_channel) ) + { + spin_unlock(&rp->event_channel_lock); + put_task_struct(rp); + return -EINVAL; + } + + rsi = rp->shared_info; + if ( !test_and_set_bit(rid, &rsi->event_channel_pend[0]) && + !test_and_set_bit(rid>>5, &rsi->event_channel_pend_sel) ) + { + cpu_mask = mark_guest_event(rp, _EVENT_EVTCHN); + guest_event_notify(cpu_mask); + } + + spin_unlock(&rp->event_channel_lock); + put_task_struct(rp); + return 0; +} + + +static long event_channel_status(u16 lid) +{ + struct task_struct *lp = current; + event_channel_t *lchn; + long rc = EVTCHNSTAT_closed; + + spin_lock(&lp->event_channel_lock); + + lchn = lp->event_channel; + + if ( lid < lp->max_event_channel ) + { + if ( (lchn[lid].flags & (ECF_INUSE|ECF_CONNECTED)) == ECF_INUSE ) + rc = EVTCHNSTAT_connected; + else if ( lchn[lid].flags & ECF_INUSE ) + rc = EVTCHNSTAT_disconnected; + } + + spin_unlock(&lp->event_channel_lock); + return rc; +} + + +long do_event_channel_op(unsigned int cmd, unsigned int id) +{ + long rc; + + switch ( cmd ) + { + case EVTCHNOP_open: + rc = event_channel_open((u16)id); + break; + + case EVTCHNOP_close: + rc = event_channel_close((u16)id); + break; + + case EVTCHNOP_send: + rc = event_channel_send((u16)id); + break; + + case EVTCHNOP_status: + rc = event_channel_status((u16)id); + break; + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + + +void destroy_event_channels(struct task_struct *p) +{ + int i; + if ( p->event_channel != NULL ) + { + for ( i = 0; i < p->max_event_channel; i++ ) + (void)event_channel_close((u16)i); + kfree(p->event_channel); + } +} diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 5bc9bf6b11..5bd13dba9b 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -60,6 +60,7 @@ #define __HYPERVISOR_multicall 17 #define __HYPERVISOR_kbd_op 18 #define __HYPERVISOR_update_va_mapping 19 +#define __HYPERVISOR_event_channel_op 20 /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" @@ -91,6 +92,7 @@ #define EVENT_NET 0x10 /* There are packets for transmission. */ #define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */ #define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */ +#define EVENT_EVTCHN 0x80 /* Event pending on an event channel */ /* Bit offsets, as opposed to the above masks. */ #define _EVENT_BLKDEV 0 @@ -100,6 +102,7 @@ #define _EVENT_NET 4 #define _EVENT_PS2 5 #define _EVENT_STOP 6 +#define _EVENT_EVTCHN 7 /* * Virtual addresses beyond this are not modifiable by guest OSes. The @@ -160,7 +163,21 @@ #define SCHEDOP_exit 1 #define SCHEDOP_stop 2 - +/* + * EVTCHNOP_* - Event channel operations. + */ +#define EVTCHNOP_open 0 /* Open channel to . */ +#define EVTCHNOP_close 1 /* Close . */ +#define EVTCHNOP_send 2 /* Send event on . */ +#define EVTCHNOP_status 3 /* Get status of . */ + +/* + * EVTCHNSTAT_* - Non-error return values from EVTCHNOP_status. + */ +#define EVTCHNSTAT_closed 0 /* Chennel is not in use. */ +#define EVTCHNSTAT_disconnected 1 /* Channel is not connected to remote. */ +#define EVTCHNSTAT_connected 2 /* Channel is connected to remote. */ + #ifndef __ASSEMBLY__ @@ -237,6 +254,30 @@ typedef struct shared_info_st { */ unsigned long events_mask; + /* + * A domain can have up to 1024 bidirectional event channels to/from other + * domains. Domains must agree out-of-band to set up a connection, and then + * each must explicitly request a connection to the other. When both have + * made the request the channel is fully allocated and set up. + * + * An event channel is a single sticky 'bit' of information. Setting the + * sticky bit also causes an upcall into the target domain. In this way + * events can be seen as an IPI [Inter-Process(or) Interrupt]. + * + * A guest can see which of its event channels are pending by reading the + * 'event_channel_pend' bitfield. To avoid a linear scan of the entire + * bitfield there is a 'selector' which indicates which words in the + * bitfield contain at least one set bit. + * + * There is a similar bitfield to indicate which event channels have been + * disconnected by the remote end. There is also a 'selector' for this + * field. + */ + u32 event_channel_pend[32]; + u32 event_channel_pend_sel; + u32 event_channel_disc[32]; + u32 event_channel_disc_sel; + /* * Time: The following abstractions are exposed: System Time, Clock Time, * Domain Virtual Time. Domains can access Cycle counter time directly. diff --git a/xen/include/xeno/event.h b/xen/include/xeno/event.h index fdb9fed24d..c733dc46fb 100644 --- a/xen/include/xeno/event.h +++ b/xen/include/xeno/event.h @@ -28,7 +28,8 @@ */ static inline unsigned long mark_guest_event(struct task_struct *p, int event) { - set_bit(event, &p->shared_info->events); + if ( test_and_set_bit(event, &p->shared_info->events) ) + return 0; /* * No need for the runqueue_lock! The check below does not race @@ -46,7 +47,8 @@ static inline unsigned long mark_guest_event(struct task_struct *p, int event) /* As above, but hyp_events are handled within the hypervisor. */ static inline unsigned long mark_hyp_event(struct task_struct *p, int event) { - set_bit(event, &p->hyp_events); + if ( test_and_set_bit(event, &p->shared_info->events) ) + return 0; smp_mb(); if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); reschedule(p); @@ -64,17 +66,21 @@ static inline void guest_event_notify(unsigned long cpu_mask) static inline unsigned long mark_guest_event(struct task_struct *p, int event) { - set_bit(event, &p->shared_info->events); - if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); - reschedule(p); + if ( !test_and_set_bit(event, &p->shared_info->events) ) + { + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + } return 0; } static inline unsigned long mark_hyp_event(struct task_struct *p, int event) { - set_bit(event, &p->hyp_events); - if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); - reschedule(p); + if ( !test_and_set_bit(event, &p->hyp_events) ) + { + if ( p->state == TASK_INTERRUPTIBLE ) wake_up(p); + reschedule(p); + } return 0; } diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h index 6d0f6bf6fd..d565583d6a 100644 --- a/xen/include/xeno/mm.h +++ b/xen/include/xeno/mm.h @@ -78,7 +78,7 @@ typedef struct pfn_info { #define page_type_count(p) ((p)->type_count) #define set_page_type_count(p,v) ((p)->type_count = v) -#define PG_domain_mask 0x00ffffff /* owning domain (24 bits) */ +#define PG_domain_mask MAX_DOMAIN_ID /* owning domain (16 bits) */ /* hypervisor flags (domain == 0) */ #define PG_slab 24 /* domain flags (domain != 0) */ diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 812336e725..d4caca2cd2 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -43,6 +43,18 @@ extern struct mm_struct init_mm; /* SMH: replace below when have explicit 'priv' flag or bitmask */ #define IS_PRIV(_p) ((_p)->domain == 0) +#define DOMAIN_ID_BITS (16) +#define MAX_DOMAIN_ID ((1<<(DOMAIN_ID_BITS))-1) + +typedef struct event_channel_st +{ + u16 target_dom; /* Target domain (i.e. domain at remote end). */ +#define ECF_TARGET_ID ((1<<10)-1) /* Channel identifier at remote end. */ +#define ECF_INUSE (1<<10) /* Is this channel descriptor in use? */ +#define ECF_CONNECTED (1<<11) /* Is this channel connected to remote? */ + u16 flags; +} event_channel_t; + struct task_struct { /* @@ -129,6 +141,11 @@ struct task_struct struct thread_struct thread; struct task_struct *prev_task, *next_task, *next_hash; + /* Event channel information. */ + event_channel_t *event_channel; + unsigned int max_event_channel; + spinlock_t event_channel_lock; + unsigned long flags; atomic_t refcnt; -- 2.30.2